subway_cleaned = read_csv("Data/cleaned_subway_data.csv")
# Importing NYC map
nyc_map = st_read(here::here('NYC', 'nyc.shp'), quiet = TRUE)
nycmap = st_transform(nyc_map, crs = 4326)
subway_cleaned %>%
ggplot() +
geom_sf(
data = nyc_map, fill = NA
) +
geom_point(
aes(x = station_longitude, y = station_latitude, color = ada),
size = 2.5, alpha = 0.5) +
coord_sf() +
theme_void(base_size = 10) +
theme(legend.position = 'bottom') +
guides(color = guide_legend(
title.position = "top",
override.aes = list(size = 3))) +
scale_color_manual(values = c("FALSE" = "aquamarine3", "TRUE" = "slateblue3")) +
labs(color = "ADA Compliance")
subway_cleaned %>%
group_by(ada) %>%
count(ada) %>%
plot_ly (x = ~ada,
y = ~n,
color = ~ada,
type = "bar") %>%
layout(
xaxis = list(title = "Ada Complaince"),
yaxis = list(title = "Number of Stations")
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
subway_cleaned %>%
group_by(entrance_type) %>%
count(entrance_type) %>%
ungroup() %>%
mutate(entrance_type = fct_reorder(entrance_type, n, .desc = TRUE)) %>%
plot_ly (x = ~entrance_type, y = ~n,
color = ~entrance_type,
type = "bar", colors = "viridis") %>%
layout(
xaxis = list(title = "Entrance Type"),
yaxis = list(title = "Number of Stations")
)
subway_cleaned %>%
ggplot() +
geom_sf(
data = nyc_map, fill = NA
) +
geom_point(
aes(x = station_longitude, y = station_latitude, color = free_crossover),
size = 2.5, alpha = 0.5) +
coord_sf() +
theme_void(base_size = 10) +
theme(legend.position = 'bottom') +
guides(color = guide_legend(
title.position = "top",
override.aes = list(size = 3))) +
labs(color = "Free Crossover")
subway_cleaned %>%
group_by(free_crossover) %>%
count(free_crossover) %>%
plot_ly (x = ~free_crossover,
y = ~n,
color = ~free_crossover,
type = "bar") %>%
layout(
xaxis = list(title = "Free Crossover"),
yaxis = list(title = "Number of Stations")
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
subway_cleaned %>%
group_by(staffing) %>%
count(staffing) %>%
ungroup() %>%
mutate(staffing = fct_reorder(staffing, n, .desc = TRUE)) %>%
plot_ly(
x = ~staffing,
y = ~n,
color = ~staffing,
type = "bar",
colors = "viridis"
) %>%
layout(
xaxis = list(title = "Staffing"),
yaxis = list(title = "Number of Stations")
)
# Convert binary variable to factors
subway_cleaned = subway_cleaned %>%
mutate(
ada <- factor(ada, levels = c("FALSE", "TRUE")),
free_crossover <- factor(free_crossover, levels = c("FALSE", "TRUE"))
)
# Select the Variables for Clustering
clustering_data <- subway_cleaned %>%
dplyr::select(entrance_type, staffing, ada, free_crossover, station_latitude, station_longitude, station_name)
set.seed(123) # For reproducibility
km_result <- clustering_data %>%
dplyr::select(-station_latitude, -station_longitude, -station_name) %>%
kmodes(modes = 3, iter.max = 10)
clustering_data$.cluster <- factor(km_result$cluster)
# Define a Mode function
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
cluster_profiles <- clustering_data %>%
group_by(.cluster) %>%
summarise(across(everything(), ~ Mode(.x)))
knitr::kable(cluster_profiles)
| .cluster | entrance_type | staffing | ada | free_crossover | station_latitude | station_longitude | station_name |
|---|---|---|---|---|---|---|---|
| 1 | Stair | FULL | FALSE | TRUE | 40.64136 | -74.01788 | 59th St |
| 2 | Stair | FULL | FALSE | TRUE | 40.66040 | -73.99809 | 25th St |
| 3 | Stair | FULL | FALSE | TRUE | 40.68367 | -73.97881 | 36th St |
# print(cluster_profiles)
# subway_cleaned_with_clusters <- subway_cleaned %>%
# mutate(cluster = km_result$cluster)
#
# # 2. Inspect cluster modes (the "central" values of each cluster)
# modes_df <- as.data.frame(km_result$modes)
# modes_df$cluster <- 1:nrow(modes_df)
# modes_df <- modes_df %>% select(cluster, everything())
#
# # Display cluster modes in a nice table
# kable(modes_df, caption = "Cluster Modes") %>%
# kable_styling(full_width = FALSE)
#
# # 3. Summarize the distribution of a key variable (e.g., entrance_type) across clusters
# entrance_distribution <- subway_cleaned_with_clusters %>%
# count(cluster, entrance_type) %>%
# group_by(cluster) %>%
# mutate(prop = n / sum(n))
#
# # Display as a table
# kable(entrance_distribution, caption = "Entrance Type Distribution by Cluster") %>%
# kable_styling(full_width = FALSE)
#
# # 4. Visualize the distribution of entrance_type by cluster
# ggplot(entrance_distribution, aes(x = factor(cluster), y = prop, fill = entrance_type)) +
# geom_col(position = "dodge") +
# scale_fill_viridis_d() +
# labs(
# title = "Distribution of Entrance Types by Cluster",
# x = "Cluster",
# y = "Proportion",
# fill = "Entrance Type"
# ) +
# theme_minimal()
#
# # 5. Similarly, visualize another variable, such as staffing, by cluster
# staffing_distribution <- subway_cleaned_with_clusters %>%
# count(cluster, staffing) %>%
# group_by(cluster) %>%
# mutate(prop = n / sum(n))
#
# ggplot(staffing_distribution, aes(x = factor(cluster), y = prop, fill = staffing)) +
# geom_col(position = "dodge") +
# scale_fill_viridis_d() +
# labs(
# title = "Distribution of Staffing Levels by Cluster",
# x = "Cluster",
# y = "Proportion",
# fill = "Staffing"
# ) +
# theme_minimal()
clustering_data <- clustering_data %>%
mutate(
accessibility_level = case_when(
.cluster == 1 ~ "High Accessibility",
.cluster == 2 ~ "Medium Accessibility",
.cluster == 3 ~ "Low Accessibility"
)
)
pal <- leaflet::colorFactor(
palette = c("chartreuse", "darkgoldenrod1", "brown2"), # Adjust colors as needed
domain = clustering_data$accessibility_level
)
leaflet() |>
addTiles() |>
addCircleMarkers(data = clustering_data,
lng = ~station_longitude,
lat = ~station_latitude,
label = ~station_name,
radius = 3,
color = NA,
# color = ~pal(accessibility_level),
fillColor = ~pal(accessibility_level),
stroke = TRUE, fillOpacity = 0.75,
popup = ~paste("Ada:", ada,
"<br> Staffing:", staffing,
"<br> Entrance type:", entrance_type,
"<br> Free crossover:", free_crossover)) |>
addProviderTiles(providers$CartoDB.Positron) |>
addLegend(
"bottomright",
pal = pal,
values = clustering_data$accessibility_level,
title = "Accessibility Level",
opacity = 1
)